# load libraries
library(tidyverse)
library(ggtree)
library(pheatmap)
library(igraph)
library(regentrans)
library(tidygraph)
library(ggraph)
library(exact2x2)
library(cowplot)
library(readxl)
library(ape)
library(phytools)
library(ggtree)
library(ggtext)
library(lubridate)
# set plotting theme
theme_set(
theme_bw() +
theme(text = element_text(size = 10),
axis.text.x = element_text(angle = 0),
strip.background = element_rect(fill="white",linetype='blank')))
# load transition edge script
source('code/get_transition_edges.R')
# set seed
set.seed(0)
# load in data
# facility and ST
facil_st <- read_csv('data/isolate_data.csv')
facil_st_mn <- facil_st %>% filter(State == 'MN')
write_csv(facil_st_mn, 'data/mn_isolate_data.csv')
# public isolate data
metadat_complete <- read_csv('data/public_isolate_metadata.csv')
# public isolate trees
tree_list <- read_rds('data/tree_list.rds')
# pairwise snv distances
kp_dists <- read_rds('data/kp_dists.rds')
ec_dists <- read_rds('data/eh_dists.rds')
# phylogenetic trees
tr_kp <- read.tree('data/kp_tr.tree')
tr_ec <- read.tree('data/eh_tr.tree')
# clinical factors
pt_feats_ct <- read_csv('data/pt_feats_ct.csv')
pt_feats_mn <- read_csv('data/pt_feats_mn.csv')
pt_feats_tn <- read_csv('data/pt_feats_tn.csv')
# shared facility exposures
pr_hosp_pairs_ct <- read_csv('data/pr_hosp_pairs_ct.csv')
pr_hosp_pairs_mn <- read_csv('data/pr_hosp_pairs_mn.csv')
pr_hosp_pairs_tn <- read_csv('data/pr_hosp_pairs_tn.csv')
# patient transfer networks
ct_trans_net_sub <- read_csv('data/trans_net_ct.csv')
mn_trans_net_sub <- read_csv('data/trans_net_mn.csv')
tn_trans_net_sub <- read_csv('data/trans_net_tn.csv')
# facility types
facil_types <- read_csv('data/facil_types.csv')
# facility counts
facil_type_counts <- read_csv('data/facil_type_counts.csv')
# facil_type_counts_wgs <- read_csv('data/facil_type_counts_wgs.csv')
# facility w prior hospitals
facil_w <- read_csv('data/facil_w_st171_prior_hosps.csv')
# prior tn hospitals
prior_hosps_tn <- read_csv('data/pr_hosps_tn.csv')
# tn facil clutser
tn_facil_cluster <- read_csv('data/tn_facil_cluster.csv')
# dominant STs and colors
st_cts <- rev(sort(table(facil_st$ST)))
dominant_sts <- names(st_cts)[st_cts > 10 & names(st_cts) != 'Other species']
st_cols <- rep(NA, length(st_cts))
names(st_cols) <- names(st_cts)
st_cols['K. pneumoniae ST258'] <- 'coral4'
st_cols['K. pneumoniae ST307'] <- 'coral3'
st_cols['E. hormaechei ST171'] <- 'aquamarine4'
st_cols['E. hormaechei ST114'] <- 'aquamarine3'
st_cols[is.na(st_cols)] <- 'grey'
st_cols['Other'] <- 'grey'
Data summary
Number of facilities with CRE per state
facil_st %>%
group_by(State) %>%
summarize(n_facil = n_distinct(Facility_ID))
## # A tibble: 3 × 2
## State n_facil
## <chr> <int>
## 1 CT 22
## 2 MN 60
## 3 TN 40
Importation vs. HGT
kpc_bin <- metadat_complete %>% select(isolate_no, kpc) %>% drop_na()
mn_bin <- metadat_complete %>% select(isolate_no, mn_bin) %>% drop_na()
tn_bin <- metadat_complete %>% select(isolate_no, tn_bin) %>% drop_na()
ct_bin <- metadat_complete %>% select(isolate_no, ct_bin) %>% drop_na()
genomes <- unique(c(kpc_bin$isolate_no, mn_bin$isolate_no, tn_bin$isolate_no, ct_bin$isolate_no))
transitions <- lapply(1:length(tree_list), function(x){
st <- names(tree_list)[[x]]
t <- midpoint.root(tree_list[[x]])
t <- keep.tip(t, t$tip.label[t$tip.label %in% genomes])
kpc_trans_edges = mn_trans_edges = tn_trans_edges = ct_trans_edges = NULL
kpc_bin_sub <- kpc_bin %>% filter(isolate_no %in% t$tip.label)
if(length(unique(kpc_bin_sub$kpc)) != 1){
kpc_trans_edges <- transition_edge_pipeline(kpc_bin_sub, t, continuous = FALSE, ar_conf = 0.5)
}
mn_bin_sub <- mn_bin %>% filter(isolate_no %in% t$tip.label)
if(length(unique(mn_bin_sub$mn_bin)) != 1){
mn_trans_edges <- transition_edge_pipeline(mn_bin_sub, t, continuous = FALSE, ar_conf = 0.5)
}
tn_bin_sub <- tn_bin %>% filter(isolate_no %in% t$tip.label)
if(length(unique(tn_bin_sub$tn_bin)) != 1){
tn_trans_edges <- transition_edge_pipeline(tn_bin_sub, t, continuous = FALSE, ar_conf = 0.5)
}
ct_bin_sub <- ct_bin %>% filter(isolate_no %in% t$tip.label)
if(length(unique(ct_bin_sub$ct_bin)) != 1){
ct_trans_edges <- transition_edge_pipeline(ct_bin_sub, t, continuous = FALSE, ar_conf = 0.5)
}
if(is.null(kpc_trans_edges) & is.null(mn_trans_edges) & is.null(tn_trans_edges) & is.null(ct_trans_edges)) return(NULL)
trans_sum <- paste0(
ifelse(mn_trans_edges$mn_bin$trans_dir != 1, '', 'to_mn '),
ifelse(tn_trans_edges$tn_bin$trans_dir != 1, '', 'to_tn '),
ifelse(ct_trans_edges$ct_bin$trans_dir != 1, '', 'to_ct '),
ifelse(kpc_trans_edges$kpc$trans_dir != 1, '', 'gain_kpc ')
)
edge <- data.frame(t$edge,
edge_num=1:nrow(t$edge),
edge_val = trans_sum[1:(length(trans_sum)-1)]) %>%
mutate(edge_val = ifelse(edge_val == '', NA, edge_val))
colnames(edge)=c("parent", "node", "edge_num","edge_val")
trans_edge_nums <- sapply(unique(metadat_complete$isolate_no[grepl('^EIP',metadat_complete$sources) &
metadat_complete$isolate_no %in% t$tip.label]),
function(i){
trans <- NA
child <- which(t$tip.label == i)
edge_num <- NA
while(is.na(trans) & child != (Ntip(t) + 1)){ # if find transition going backwards or get to root
child_edge_info <- edge %>% filter(node == child)
trans <- child_edge_info %>% select(edge_val) %>% deframe()
child <- child_edge_info %>% select(parent) %>% deframe()
if(!is.na(trans)){
edge_num <- child_edge_info %>% select(edge_num) %>% deframe()
}
}
edge_num
})
trans_edge_nums <- trans_edge_nums[!is.na(trans_edge_nums)] #recently added
grps <- data.frame(id=names(trans_edge_nums),
class=trans_sum[trans_edge_nums],
cluster=trans_edge_nums) %>%
mutate(state = factor(gsub('_.*','',id), levels = c('MN','TN','CT')),
ST = sapply(id, function(x){
val <- unlist(unique(metadat_complete$ST[metadat_complete$isolate_no == x & !is.na(metadat_complete$ST)]))
val <- ifelse(length(val) == 0, paste0('ST', unlist(unique(metadat_complete$mlst[metadat_complete$isolate_no == x & !is.na(metadat_complete$mlst)]))), val)
val <- ifelse(length(val) == 0 | val == 'ST', st, val)
ifelse(length(val) == 0, NA, val)
}),
species = sapply(id, function(x){
val <- unlist(unique(metadat_complete$species[metadat_complete$isolate_no == x & !is.na(metadat_complete$species)]))
ifelse(length(val) == 0, NA, val)
}),
ST_orig = ST,
ST = ifelse(ST %in% c('E. hormaechei ST171','E. hormaechei ST114',
'K. pneumoniae ST258','K. pneumoniae ST307',
'E. cloacae ST171', 'E. cloacae ST114',
'ST258', 'ST307', 'ST114', 'ST171'),
ST, 'Other'),
tree = st
)
if(nrow(grps) == 0) return(NULL)
grps
}) %>%
bind_rows() %>% suppressWarnings()
transitions <- transitions %>%
mutate(class = gsub('NA| ', '', class),
class = ifelse(is.na(class), '', class),
hgt_type = ifelse(grepl('gain_kpc', class), 'Unknown', ''),
hgt_type = ifelse(class == 'gain_kpc', 'In-state', hgt_type),
class2 = ifelse(toupper(gsub('to_','',class)) != state & !grepl('gain_kpc', class), '', class),
class2 = ifelse(grepl('gain_kpc', class2), 'Putative\nHGT', class2),
class2 = ifelse(class2 == '', 'Unknown', class2),
class2 = ifelse(grepl('to_', class2), 'Putative\nimportation', class2),
class2 = factor(class2, levels = c('Putative\nimportation','Putative\nHGT','Unknown')),
ST = ifelse(ST %in% c('ST258','ST307'), paste0('K. pneumoniae ', ST), ST),
ST = ifelse(ST %in% c('ST171','ST114'), paste0('E. hormaechei ', ST), ST),
ST = gsub('cloacae', 'hormaechei', ST),
ST_orig = gsub('cloacae', 'hormaechei', ST_orig)
)
keepers <- facil_st %>%
filter(!(duplicated(Patient_ID) & duplicated(ST))) %>%
pull(Sample_ID)
Trees used (Figure S3)
states <- metadat_complete %>% select(isolate_no, state) %>% deframe()
states <- unique(states[c(tree_list[[1]]$tip.label, tree_list[[4]]$tip.label, tree_list[[7]]$tip.label, tree_list[[12]]$tip.label)])
states <- states[!is.na(states)]
state_cols <- c('#e6194b', '#3cb44b', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', '#ffffff', '#000000', '#ffe119')
names(state_cols) <- states
plot_tr_locs <- function(tr, og, title = NULL){
tr <- root(tr, og)
tr <- drop.tip(tr, og)
states <- metadat_complete %>% select(isolate_no, state) %>% deframe()
states <- states[tr$tip.label]
kpc <- metadat_complete %>% select(isolate_no, kpc) %>% deframe()
kpc <- kpc[tr$tip.label]
no_info <- is.na(states) & is.na(kpc)
states <- states[!no_info]
kpc <- kpc[!no_info]
kpc <- ifelse(kpc == 1, 'Present', 'Absent')
tr <- keep.tip(tr, union(names(states), names(kpc)))
states <- c(states, rep(NA, Nnode(tr)))
kpc <- c(kpc, rep(NA, Nnode(tr)))
ggtree(tr) +
geom_tippoint(aes(col = states, shape = kpc)) +
scale_color_manual(values = state_cols) +
scale_shape_manual(values = c('Present' = 16, 'Absent' = 1)) +
guides(color = guide_legend(ncol = 2)) +
labs(title = title, shape = 'KPC', col = 'State')
}
tr_st114 <- plot_tr_locs(tree_list[[7]], 'MN_CRE226', '*E. hormaechei* ST114') +
theme(legend.position = 'none', plot.title = ggtext::element_markdown())
tr_st171 <- plot_tr_locs(tree_list[[4]], 'MN_CRE226', '*E. hormaechei* ST171') +
theme(legend.position = 'none', plot.title = ggtext::element_markdown())
tr_st258 <- plot_tr_locs(tree_list[[1]], '573.1451', '*K. pneumoniae* ST258') +
theme(legend.position = 'none', plot.title = ggtext::element_markdown())
tr_st307 <- plot_tr_locs(tree_list[[12]], '573.13662', '*K. pneumoniae* ST307') +
theme(legend.position = 'none', plot.title = ggtext::element_markdown())
leg <- get_legend(plot_tr_locs(tree_list[[7]], 'MN_CRE226', 'E. hormaechei ST114'))
fs3 <- plot_grid(plot_grid(tr_st171, tr_st114, tr_st258, tr_st307), leg)
ggsave(plot = fs3, filename = 'figures/FigS3.png', width = 8, height = 8)
fs3

Importation vs. HGT (Figure 1)
clust_cols <- st_cols[transitions$ST]
names(clust_cols) <- paste0(transitions$state, transitions$ST, transitions$cluster)
clust_cols <- (c(clust_cols[clust_cols == "grey"],
clust_cols[clust_cols == "coral3"],
clust_cols[clust_cols == "coral4"],
clust_cols[clust_cols == "aquamarine3"],
clust_cols[clust_cols == "aquamarine4"]
))
transitions <- transitions %>% filter(id %in% keepers) %>%
mutate(ST = factor(ST, levels = (c("E. hormaechei ST171", "E. hormaechei ST114",
"K. pneumoniae ST258", "K. pneumoniae ST307",
'Other'))),
state = factor(state, levels = c('CT', 'MN', 'TN'))) %>%
arrange(ST)
f1 <- transitions %>%
ggplot(aes(x = class2,
fill = factor(paste0(state, ST, cluster),
levels = unique(paste0(state, ST, cluster))))) +
geom_bar(color = 'white', position = position_stack(reverse = TRUE)) +
scale_fill_manual(values = clust_cols,
breaks = rev(names(clust_cols)[!duplicated(clust_cols)]),
labels = c('*E. hormaechei* ST171', '*E. hormaechei* ST114',
'*K. pneumoniae* ST258', '*K. pneumoniae* ST307',
'Other'),
drop = FALSE) +
facet_grid(~state, scales = 'free', space = 'free') +
labs(fill = 'ST cluster', x = 'Cluster type', y = 'Number of isolates') +
theme(legend.text = ggtext::element_markdown())
ggsave(plot = f1, filename = 'figures/Fig1.png', width = 7, height = 4)
f1

transitions %>%
group_by(ST, class2) %>%
summarize(count = n()) %>%
group_by(ST) %>%
mutate(tot = sum(count),
prop = count/tot)
## # A tibble: 12 × 5
## # Groups: ST [5]
## ST class2 count tot prop
## <fct> <fct> <int> <int> <dbl>
## 1 E. hormaechei ST171 "Putative\nimportation" 67 71 0.944
## 2 E. hormaechei ST171 "Unknown" 4 71 0.0563
## 3 E. hormaechei ST114 "Putative\nimportation" 9 23 0.391
## 4 E. hormaechei ST114 "Putative\nHGT" 14 23 0.609
## 5 K. pneumoniae ST258 "Putative\nimportation" 63 66 0.955
## 6 K. pneumoniae ST258 "Unknown" 3 66 0.0455
## 7 K. pneumoniae ST307 "Putative\nimportation" 1 20 0.05
## 8 K. pneumoniae ST307 "Putative\nHGT" 17 20 0.85
## 9 K. pneumoniae ST307 "Unknown" 2 20 0.1
## 10 Other "Putative\nimportation" 36 114 0.316
## 11 Other "Putative\nHGT" 27 114 0.237
## 12 Other "Unknown" 51 114 0.447
Distances
# snv distances
kp_snv_dists <- get_pair_types(kp_dists, facil_st %>% select(Sample_ID, Facility_ID) %>% deframe(), pt = NULL) %>%
filter(!sapply(1:nrow(.), function(x) sort(c(as.character(isolate1[x]),as.character(isolate2[x])))) %>% t() %>% duplicated()) %>%
mutate(st1 = sapply(isolate1, function(x) facil_st$ST[facil_st$Sample_ID == x]),
st2 = sapply(isolate2, function(x) facil_st$ST[facil_st$Sample_ID == x]),
state1 = sapply(isolate1, function(x) facil_st$State[facil_st$Sample_ID == x]),
state2 = sapply(isolate2, function(x) facil_st$State[facil_st$Sample_ID == x])) %>%
filter(st1 == st2 & !grepl('novel',st1)) %>%
mutate(state_pair = paste(state1, state2))
ec_snv_dists <- get_pair_types(ec_dists, facil_st %>% select(Sample_ID, Facility_ID) %>% deframe(), pt = NULL) %>%
filter(!sapply(1:nrow(.), function(x) sort(c(as.character(isolate1[x]),as.character(isolate2[x])))) %>% t() %>% duplicated()) %>%
mutate(st1 = sapply(isolate1, function(x) facil_st$ST[facil_st$Sample_ID == x]),
st2 = sapply(isolate2, function(x) facil_st$ST[facil_st$Sample_ID == x]),
state1 = sapply(isolate1, function(x) facil_st$State[facil_st$Sample_ID == x]),
state2 = sapply(isolate2, function(x) facil_st$State[facil_st$Sample_ID == x])) %>%
filter(st1 == st2 & !grepl('novel',st1)) %>%
mutate(state_pair = paste(state1, state2))
snv_dat <- bind_rows(kp_snv_dists, ec_snv_dists) %>% filter(state1 == state2) %>%
mutate(state1 = factor(state1, levels = c('MN','TN','CT')),
pair_type = ifelse(is.na(pair_type), 'Unknown', pair_type),
under_thresh = ifelse(pairwise_dist <= 15, '(10,15]','(15,∞)'),
under_thresh = ifelse(pairwise_dist <= 10, '(5,10]',under_thresh),
under_thresh = ifelse(pairwise_dist <= 5, '[0,5]',under_thresh),
under_thresh = factor(under_thresh, levels = c('[0,5]','(5,10]','(10,15]','(15,∞)')),
ST_orig = st1,
ST = ifelse(st1 %in% dominant_sts, st1, 'Other'),
Species = gsub(' ST.*','',st1),
ST = gsub('.* ','',ST))
# patristic distances
kp_pat <- cophenetic.phylo(tr_kp)
ec_pat <- cophenetic.phylo(tr_ec)
kp_pat_dists <- get_pair_types(kp_pat, facil_st %>% select(Sample_ID, Facility_ID) %>% deframe(), pt = NULL) %>%
filter(!sapply(1:nrow(.), function(x) sort(c(as.character(isolate1[x]),as.character(isolate2[x])))) %>% t() %>% duplicated()) %>%
mutate(st1 = sapply(isolate1, function(x) facil_st$ST[facil_st$Sample_ID == x]),
st2 = sapply(isolate2, function(x) facil_st$ST[facil_st$Sample_ID == x]),
state1 = sapply(isolate1, function(x) facil_st$State[facil_st$Sample_ID == x]),
state2 = sapply(isolate2, function(x) facil_st$State[facil_st$Sample_ID == x])) %>%
filter(st1 == st2 & !grepl('novel',st1)) %>%
mutate(state_pair = paste(state1, state2))
ec_pat_dists <- get_pair_types(ec_pat, facil_st %>% select(Sample_ID, Facility_ID) %>% deframe(), pt = NULL) %>%
filter(!sapply(1:nrow(.), function(x) sort(c(as.character(isolate1[x]),as.character(isolate2[x])))) %>% t() %>% duplicated()) %>%
mutate(st1 = sapply(isolate1, function(x) facil_st$ST[facil_st$Sample_ID == x]),
st2 = sapply(isolate2, function(x) facil_st$ST[facil_st$Sample_ID == x]),
state1 = sapply(isolate1, function(x) facil_st$State[facil_st$Sample_ID == x]),
state2 = sapply(isolate2, function(x) facil_st$State[facil_st$Sample_ID == x])) %>%
filter(st1 == st2 & !grepl('novel',st1)) %>%
mutate(state_pair = paste(state1, state2))
pat_dat <- bind_rows(kp_pat_dists, ec_pat_dists) %>% filter(state1 == state2) %>%
mutate(state1 = factor(state1, levels = c('MN','TN','CT')),
pair_type = ifelse(is.na(pair_type), 'Unknown', pair_type),
ST_orig = st1,
ST = ifelse(st1 %in% dominant_sts, st1, 'Other'),
Species = gsub(' ST.*','',st1),
ST = gsub('.* ','',ST))
Intra- and inter-facility pairwise SNV distances
Closely related facility type pairs (Table S2)
pr_hosp_pairs_ct <- left_join(pr_hosp_pairs_ct, snv_dat)
pr_hosp_pairs_mn <- left_join(pr_hosp_pairs_mn, snv_dat)
pr_hosp_pairs_tn <- left_join(pr_hosp_pairs_tn, snv_dat)
nodes <- bind_rows(pr_hosp_pairs_mn %>% select(loc1, state1, st1) %>%
rename(Facility_ID = loc1, state = state1, st = st1),
pr_hosp_pairs_mn %>% select(loc2, state2, st2) %>%
rename(Facility_ID = loc2, state = state2, st = st2),
pr_hosp_pairs_tn %>% select(loc1, state1, st1) %>%
rename(Facility_ID = loc1, state = state1, st = st1),
pr_hosp_pairs_tn %>% select(loc2, state2, st2) %>%
rename(Facility_ID = loc2, state = state2, st = st2),
pr_hosp_pairs_ct %>% select(loc1, state1, st1) %>%
rename(Facility_ID = loc1, state = state1, st = st1),
pr_hosp_pairs_ct %>% select(loc2, state2, st2) %>%
rename(Facility_ID = loc2, state = state2, st = st2)) %>%
filter(!duplicated(.)) %>% filter(!is.na(Facility_ID)) %>%
select(-st)
# mutate(st = gsub('cloacae', 'hormaechei', st),
# st = ifelse(st %in% c('E. hormaechei ST114','E. hormaechei ST171', 'K. pneumoniae ST258', 'K. pneumoniae ST307'), st, 'Other'))
edges <- bind_rows(pr_hosp_pairs_mn %>% rename(from = loc1, to = loc2),
pr_hosp_pairs_tn %>% rename(from = loc1, to = loc2),
pr_hosp_pairs_ct %>% rename(from = loc1, to = loc2)) %>%
filter(pairwise_dist < 10) %>%
select(from, to, same_pr_hosp, st1) %>%
filter(!is.na(to) & !is.na(from)) %>%
mutate(st1 = gsub('cloacae', 'hormaechei', st1),
st1 = ifelse(st1 %in% c('E. hormaechei ST114','E. hormaechei ST171', 'K. pneumoniae ST258', 'K. pneumoniae ST307'), st1, 'Other'),
st1 = factor(st1, levels = c('E. hormaechei ST114','E. hormaechei ST171',
'K. pneumoniae ST258', 'K. pneumoniae ST307', 'Other'),
labels = c('*E. hormaechei* ST114','*E. hormaechei* ST171',
'*K. pneumoniae* ST258', '*K. pneumoniae* ST307', 'Other')),
same_pr_hosp = ifelse(same_pr_hosp, 'Yes','No'))
graph <- tbl_graph(nodes = nodes, edges = edges, directed = FALSE) %>%
induced.subgraph(igraph::degree(.) > 0)
left_join(edges, facil_types %>% rename(from = Facility_ID, type_from = type)) %>%
left_join(facil_types %>% rename(to = Facility_ID, type_to = type)) %>%
mutate(type_pair = sapply(1:nrow(.), function(x) paste0(sort(c(type_from[x], type_to[x])), collapse = '-')),
type_pair = ifelse(type_pair %in% c('ACH-ACH', 'ACH-LTACH', 'ACH-SNF'), type_pair, 'Other')) %>%
group_by(state, st1, type_pair) %>%
summarize(count = n()) %>%
arrange(state, st1, type_pair) %>%
pivot_wider(names_from = type_pair, values_from = count, values_fill = 0) %>%
select(state, st1, `ACH-ACH`, `ACH-LTACH`, `ACH-SNF`, Other)
## # A tibble: 13 × 6
## # Groups: state, st1 [13]
## state st1 `ACH-ACH` `ACH-LTACH` `ACH-SNF` Other
## <chr> <fct> <int> <int> <int> <int>
## 1 CT *K. pneumoniae* ST258 2 0 0 0
## 2 CT *K. pneumoniae* ST307 1 0 0 0
## 3 CT Other 1 0 0 2
## 4 MN *E. hormaechei* ST114 2 5 1 1
## 5 MN *E. hormaechei* ST171 109 92 56 101
## 6 MN *K. pneumoniae* ST258 9 1 6 42
## 7 MN *K. pneumoniae* ST307 1 0 0 2
## 8 MN Other 1 0 0 1
## 9 TN *E. hormaechei* ST114 9 1 0 2
## 10 TN *E. hormaechei* ST171 1 1 0 2
## 11 TN *K. pneumoniae* ST258 13 6 0 0
## 12 TN *K. pneumoniae* ST307 17 0 0 0
## 13 TN Other 6 1 0 3
Intra-facility analysis (Figure S8)
min_dat <- lapply(unique(c(as.character(snv_dat$isolate1), as.character(snv_dat$isolate2))), function(i){
snv_dat %>% filter(pair_type == 'Intra-facility pair' & (isolate1 == i | isolate2 == i)) %>%
group_by(loc1, ST_orig, state1) %>%
summarize(dist_min = min(pairwise_dist))
}) %>% bind_rows() %>% suppressMessages() %>% suppressWarnings()
fs8 <- min_dat %>%
mutate(under_thresh = ifelse(dist_min <= 15, '(10,15]','(15,∞)'),
under_thresh = ifelse(dist_min <= 10, '(5,10]',under_thresh),
under_thresh = ifelse(dist_min <= 5, '[0,5]',under_thresh),
under_thresh = factor(under_thresh, levels = rev(c('[0,5]','(5,10]','(10,15]','(15,∞)'))),
ST = ifelse(ST_orig %in% c('K. pneumoniae ST258','E. hormaechei ST171', 'K. pneumoniae ST307','E. hormaechei ST114'), ST_orig, 'Other'),
ST = factor(ST, levels = c('E. hormaechei ST171', 'E. hormaechei ST114', 'K. pneumoniae ST258', 'K. pneumoniae ST307', 'Other'),
labels = c('*E. hormaechei*<br>ST171', '*E. hormaechei*<br>ST114',
'*K. pneumoniae*<br>ST258', '*K. pneumoniae*<br>ST307', 'Other')),
state1 = factor(state1, levels = c('CT', 'MN', 'TN'))) %>%
ggplot(aes(x = reorder(loc1, loc1, function(x)-length(x)), fill = under_thresh)) +
geom_bar() +
scale_fill_manual(values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4",'(15,∞)' = "#FBB4AE")) +
labs(y = "Number of isolates", x = "Facility", fill = 'SNV bin of closest\nintra-facility isolate') +
facet_grid(ST~state1, space = 'free', scales = 'free_x') +
theme(axis.text.x = element_blank(), strip.text.y = element_markdown()) +
scale_y_continuous(breaks = scales::pretty_breaks())
ggsave(plot = fs8, filename = 'figures/FigS8.png', width = 7, height = 5)
fs8

Clinical factor analysis (Table S3)
fisher <- function(a,b,c,d){
data <- matrix(c(a,b,c,d),ncol=2)
c(p = fisher.test(data)$p.value,
OR = fisher.test(data)$estimate)
}
intra_pairs_ct <- snv_dat %>% filter(loc1 == loc2 & st1 == st2 & state1 == 'CT')
intra_pairs_mn <- snv_dat %>% filter(loc1 == loc2 & st1 == st2 & state1 == 'MN')
intra_pairs_tn <- snv_dat %>% filter(loc1 == loc2 & st1 == st2 & state1 == 'TN')
pt_info_ct <- suppressMessages(lapply(1:nrow(intra_pairs_ct), function(x){
sub <- pt_feats_ct %>% filter(Sample_ID %in% unlist(c(intra_pairs_ct[x,c('isolate1','isolate2')]))) %>%
select_if(colSums(is.na(.)) == 0)
info <- bind_cols(intra_pairs_ct[x,])%>% mutate(shared_clin = FALSE)
if(ncol(sub) > 2 & nrow(sub) > 1){
info <- info %>% mutate(shared_clin = TRUE)
}
info
})) %>% bind_rows()
pt_info_mn <- suppressMessages(lapply(1:nrow(intra_pairs_mn), function(x){
sub <- pt_feats_mn %>% filter(Sample_ID %in% unlist(c(intra_pairs_mn[x,c('isolate1','isolate2')]))) %>%
select_if(colSums(is.na(.)) == 0)
info <- bind_cols(intra_pairs_mn[x,])%>% mutate(shared_clin = FALSE)
if(ncol(sub) > 2 & nrow(sub) > 1){
info <- info %>% mutate(shared_clin = TRUE, shared_comorb = paste0(names(sub)[!names(sub) == 'Sample_ID'], collapse = ','))
}
info
})) %>% bind_rows()
pt_info_tn <- suppressMessages(lapply(1:nrow(intra_pairs_tn), function(x){
sub <- pt_feats_tn %>% filter(Sample_ID %in% unlist(c(intra_pairs_tn[x,c('isolate1','isolate2')]))) %>%
unique() %>%
select_if(c(TRUE, colSums(.[,2:ncol(.)]) != 0))
info <- bind_cols(intra_pairs_tn[x,]) %>% mutate(shared_clin = FALSE)
if(ncol(sub) > 2 & nrow(sub) > 1){
info <- info %>% mutate(shared_clin = TRUE, shared_comorb = paste0(names(sub)[!names(sub) == 'Sample_ID'], collapse = ','))
}
info
})) %>% bind_rows()
pt_info_comb <- bind_rows(pt_info_mn, pt_info_tn, pt_info_ct) %>%
mutate(`pairwise_dist <= 10` = pairwise_dist <= 10) %>%
select(state1, st1, shared_clin, `pairwise_dist <= 10`)
pt_comorb_compare <- pt_info_comb %>%
group_by(state1, st1, shared_clin, `pairwise_dist <= 10`) %>% summarize(count = n()) %>%
mutate(sharedclin_pdlt10 = factor(paste0(ifelse(shared_clin, 'shared', 'not shared'), ', ',
ifelse(`pairwise_dist <= 10`, '≤10', '>10')),
levels = c('shared, ≤10', 'shared, >10', 'not shared, ≤10', 'not shared, >10'))) %>%
ungroup() %>%
arrange(sharedclin_pdlt10) %>%
select(-c(shared_clin, `pairwise_dist <= 10`)) %>%
pivot_wider(names_from = sharedclin_pdlt10, values_from = count, values_fill = 0) %>%
group_by(state1, st1) %>%
rowwise() %>%
mutate(fe_p = fisher(`shared, ≤10`, `shared, >10`, `not shared, ≤10`, `not shared, >10`)[[1]]) %>%
arrange(state1, st1) %>%
mutate(frac_shared_lt10 = `shared, ≤10`/(`shared, ≤10` + `not shared, ≤10`),
frac_shared_gt10 = `shared, >10`/(`shared, >10` + `not shared, >10`))
pt_comorb_compare
## # A tibble: 17 × 9
## # Rowwise: state1, st1
## state1 st1 share…¹ share…² not s…³ not s…⁴ fe_p frac_s…⁵ frac_s…⁶
## <fct> <chr> <int> <int> <int> <int> <dbl> <dbl> <dbl>
## 1 MN E. hormaechei… 1 0 0 2 0.333 1 0
## 2 MN E. hormaechei… 6 17 80 212 1 0.0698 0.0742
## 3 MN E. hormaechei… 0 0 1 0 1 0 NaN
## 4 MN K. pneumoniae… 0 0 1 0 1 0 NaN
## 5 MN K. pneumoniae… 0 0 1 0 1 0 NaN
## 6 MN K. pneumoniae… 0 3 25 99 1 0 0.0294
## 7 TN E. hormaechei… 1 9 0 0 1 1 1
## 8 TN E. hormaechei… 0 3 0 0 1 NaN 1
## 9 TN E. hormaechei… 1 0 0 0 1 1 NaN
## 10 TN E. hormaechei… 0 1 0 0 1 NaN 1
## 11 TN E. hormaechei… 1 0 0 0 1 1 NaN
## 12 TN E. hormaechei… 0 1 0 0 1 NaN 1
## 13 TN K. pneumoniae… 1 0 0 0 1 1 NaN
## 14 TN K. pneumoniae… 4 8 1 3 1 0.8 0.727
## 15 TN K. pneumoniae… 5 7 3 0 0.200 0.625 1
## 16 CT K. pneumoniae… 0 0 0 1 1 NaN 0
## 17 CT K. pneumoniae… 1 5 0 5 1 1 0.5
## # … with abbreviated variable names ¹`shared, ≤10`, ²`shared, >10`,
## # ³`not shared, ≤10`, ⁴`not shared, >10`, ⁵frac_shared_lt10,
## # ⁶frac_shared_gt10
Inter-facility analysis
Aggregate patient transfer (Figure S9)
mn_pt_trans <- get_patient_flow(mn_trans_net_sub,
unique(facil_st$Facility_ID)[unique(facil_st$Facility_ID) %in%
c(mn_trans_net_sub$source_facil,
mn_trans_net_sub$dest_facil)]) %>%
filter(!duplicated(.))
tn_pt_trans <- get_patient_flow(tn_trans_net_sub, unique(facil_st$Facility_ID)[unique(facil_st$Facility_ID) %in%
c(tn_trans_net_sub$source_facil,
tn_trans_net_sub$dest_facil)]) %>%
filter(!duplicated(.))
ct_pt_trans <- get_patient_flow(ct_trans_net_sub, unique(facil_st$Facility_ID)[unique(facil_st$Facility_ID) %in%
c(ct_trans_net_sub$source_facil,
ct_trans_net_sub$dest_facil)]) %>%
filter(!duplicated(.))
sts <- c('ST171','ST258','ST114','ST307')
snv_by_st <- suppressWarnings(lapply(sts, function(x){
snvdat_sub <- snv_dat %>% filter(ST == x) %>% mutate(pairwise_dist = as.numeric(pairwise_dist))
snvdat_sub <- snvdat_sub[,1:6]
mn_snv_by_st_sub <- summarize_pairs(snvdat_sub) %>% mutate(ST = x)
})) %>% bind_rows()
pt_snv <- bind_rows(bind_cols(state = 'MN', mn_pt_trans),
bind_cols(state = 'TN', tn_pt_trans),
bind_cols(state = 'CT', ct_pt_trans)) %>% inner_join(snv_by_st) %>%
mutate(under_thresh = ifelse(dist_min <= 15, '(10,15]','(15,∞)'),
under_thresh = ifelse(dist_min <= 10, '(5,10]',under_thresh),
under_thresh = ifelse(dist_min <= 5, '[0,5]',under_thresh),
under_thresh = factor(under_thresh, levels = c('[0,5]','(5,10]','(10,15]','(15,∞)')),
species = ifelse(ST %in% c('ST258','ST307'), 'K. pneumoniae','E. hormaechei'))
pvals <- pt_snv %>%
mutate(leq_10_bool = ifelse(dist_min <= 10, 'close','not_close')) %>%
select(state, ST, leq_10_bool, sum_pt_trans_metric) %>% group_by(state, ST, leq_10_bool) %>%
summarize(sum_pt_trans_metric = list(sum_pt_trans_metric)) %>%
spread(leq_10_bool, sum_pt_trans_metric) %>%
group_by(state, ST) %>%
filter(!(is.null(unlist(close)) | is.null(unlist(not_close)))) %>%
mutate(close_median=median(unlist(close)),
not_close_median=median(unlist(not_close)),
p=wilcox.test(unlist(close),unlist(not_close))$p.value) %>%
mutate(species = ifelse(ST %in% c('ST258','ST307'), '*K. pneumoniae*','*E. hormaechei*'))
fs9 <- pt_snv %>%
mutate(species = gsub('^', '\\*',species),
species = gsub('$', '\\*',species),
sum_pt_trans_metric = sum_pt_trans_metric + min(sum_pt_trans_metric[sum_pt_trans_metric != 0]/2), # no infinite values in transformation
dist_min = dist_min + min(dist_min[dist_min != 0]/2)) %>% # no infinite values in transformation
ggplot(aes(x = sum_pt_trans_metric, y = dist_min)) +
geom_point(aes(color = under_thresh)) + scale_y_log10() + scale_x_log10() +
scale_color_manual(
values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4",'(15,∞)' = "#FBB4AE")
) +
geom_text(data = pvals, aes(x = 5e-5, y = 1, label = paste0('p=',signif(p, 1))), hjust = 0) +
facet_grid(species+ST~state) +
labs(x = 'Sum(patient flow between facility pair)', y = 'Minimum pairwise SNV distance', color = 'SNV bin') +
theme(strip.text.y = element_markdown())
ggsave(plot = fs9, filename = 'figures/FigS9.png', width = 7, height = 4)
fs9

Prior hospital
Individual healthcare exposures (Table 2)
pr_summary <- bind_rows(pr_hosp_pairs_ct, pr_hosp_pairs_mn, pr_hosp_pairs_tn) %>%
left_join(snv_dat %>% filter(loc1 != loc2)) %>%
mutate(st1 = ifelse(st1 %in% c("E. hormaechei ST114", "E. hormaechei ST171", "K. pneumoniae ST258",
"K. pneumoniae ST307"), st1, 'Other')) %>%
group_by(state1, st1, same_pr_hosp, pairwise_dist <= 10) %>%
summarize(total_l10 = n()) %>%
mutate(same_pr_hosp = ifelse(same_pr_hosp, 'pr_hosp','no_pr_hosp'),
`pairwise_dist <= 10` = ifelse(`pairwise_dist <= 10`, 'lt10','gt10')) %>%
pivot_wider(names_from = c(same_pr_hosp, `pairwise_dist <= 10`), values_from = c(total_l10)) %>%
rename(state = state1, st = st1) %>%
select(state, st, pr_hosp_lt10, no_pr_hosp_lt10, pr_hosp_gt10, no_pr_hosp_gt10) %>%
replace(is.na(.), 0)
pr_summary$fisher_p <- sapply(1:nrow(pr_summary), function(x){
exact2x2(matrix(unlist(c(pr_summary[x,3:6])), nrow = 2))$p.value %>% signif(digits = 1)
})
pr_summary <- pr_summary %>%
mutate(pr_hosp_lt10_pct = round(pr_hosp_lt10/(no_pr_hosp_lt10 + pr_hosp_lt10), 1)*100, .after = no_pr_hosp_lt10) %>%
mutate(pr_hosp_gt10_pct = round(pr_hosp_gt10/(no_pr_hosp_gt10 + pr_hosp_gt10), 1)*100, .after = no_pr_hosp_gt10) %>%
mutate('Pairwise SNV distance ≤ 10\nwith shared facility exposure (%)' =
paste0(pr_hosp_lt10, '/', (pr_hosp_lt10+no_pr_hosp_lt10), ' (',round(pr_hosp_lt10/(pr_hosp_lt10+no_pr_hosp_lt10),2)*100,')'),
'Pairwise SNV distance > 10\nwith shared facility exposure (%)' =
paste0(pr_hosp_gt10, '/', (pr_hosp_gt10+no_pr_hosp_gt10), ' (',round(pr_hosp_gt10/(pr_hosp_gt10+no_pr_hosp_gt10),2)*100,')')) %>%
select(c("state", "st",
"Pairwise SNV distance ≤ 10\nwith shared facility exposure (%)",
"Pairwise SNV distance > 10\nwith shared facility exposure (%)",
"fisher_p"
)) %>%
rename(State = state, ST = st, "Fisher's exact p" = fisher_p)
pr_summary
## # A tibble: 13 × 5
## # Groups: State, ST [13]
## State ST Pairwise SNV distance ≤ 10\nwith …¹ Pairw…² Fishe…³
## <fct> <chr> <chr> <chr> <dbl>
## 1 MN E. hormaechei ST114 3/9 (33) 1/43 (… 1e-2
## 2 MN E. hormaechei ST171 23/430 (5) 62/195… 3e-2
## 3 MN K. pneumoniae ST258 10/62 (16) 23/195… 2e-8
## 4 MN K. pneumoniae ST307 3/3 (100) 0/7 (0) 8e-3
## 5 MN Other 2/4 (50) 0/6 (0) 1e-1
## 6 TN E. hormaechei ST114 4/16 (25) 24/79 … 8e-1
## 7 TN E. hormaechei ST171 1/4 (25) 1/21 (… 3e-1
## 8 TN K. pneumoniae ST258 7/21 (33) 7/369 … 1e-6
## 9 TN K. pneumoniae ST307 5/17 (29) 30/104… 1e+0
## 10 TN Other 7/13 (54) 4/18 (… 1e-1
## 11 CT K. pneumoniae ST258 1/2 (50) 1/197 … 2e-2
## 12 CT K. pneumoniae ST307 0/1 (0) 0/2 (0) 1e+0
## 13 CT Other 0/3 (0) 0/4 (0) 1e+0
## # … with abbreviated variable names
## # ¹`Pairwise SNV distance ≤ 10\nwith shared facility exposure (%)`,
## # ²`Pairwise SNV distance > 10\nwith shared facility exposure (%)`,
## # ³`Fisher's exact p`
MN maps (Figure 3)
# connecting each patient to their closest isolate with a subsequent collection date
edge_dat <- lapply(unique(c(as.character(snv_dat$isolate1), as.character(snv_dat$isolate2))), function(i){
cultdate <- facil_st$Cult_Date[facil_st$Sample_ID == i]
subset_dat <- snv_dat %>% filter(pair_type == 'Inter-facility pair' & (isolate1 == i | isolate2 == i) & state1 == 'MN')
dates <- sapply(unique(c(as.character(subset_dat$isolate1), as.character(subset_dat$isolate2))), function(j){
as.character(facil_st$Cult_Date[facil_st$Sample_ID == j])
})
dat <- NULL
if(length(dates) > 0){
keepers <- names(dates)[as.Date(dates) > cultdate]
dat <- subset_dat %>%
filter(isolate1 %in% keepers | isolate2 %in% keepers) %>%
filter(pairwise_dist == min(pairwise_dist)) %>%
mutate(earlier = as.character(ifelse(isolate2 == i, 'loc2','loc1')),
date1 = dates[as.character(isolate1)],
date2 = dates[as.character(isolate2)],
datediff = abs(as.numeric(as.Date(dates[as.character(isolate1)]) - as.Date(dates[as.character(isolate2)]))))
}
}) %>% bind_rows()
edges_for_plot <- edge_dat %>%
filter(pairwise_dist <= 15) %>%
left_join(facil_st %>% select(Sample_ID, Facility_ID, longitude, latitude),
by = c('isolate1' = 'Sample_ID', 'loc1' = 'Facility_ID')) %>%
rename(x = longitude, y = latitude) %>%
left_join(facil_st %>% select(Sample_ID, Facility_ID, longitude, latitude),
by = c('isolate2' = 'Sample_ID', 'loc2' = 'Facility_ID')) %>%
rename(xend = longitude, yend = latitude) %>%
filter(!(is.na(x) | is.na(xend)) & ST %in% c('ST171','ST258')) %>%
mutate(dist = abs(x - xend)^2 + abs(y - yend)^2)
facilities <- facil_st %>% filter(State == 'MN' & ST %in% c('E. hormaechei ST171', 'K. pneumoniae ST258')) %>%
select(Facility_ID, latitude, longitude) %>%
filter(!duplicated(.) & !is.na(latitude))
edges_for_plot_ec <- edges_for_plot %>% filter(ST == 'ST171') %>%
mutate(under_thresh = factor(under_thresh, levels = c("[0,5]", "(5,10]", "(10,15]")))
whole_state_ec <- facil_st %>%
mutate(ST = gsub('cloacae', 'hormaechei', ST)) %>%
filter(ST %in% c('E. hormaechei ST171')) %>%
mutate(ST = gsub('.* ST', 'ST', ST)) %>%
group_by(Facility_ID, latitude, longitude, ST) %>% tally() %>%
ggplot(aes(x=longitude, y=latitude), color = "grey99") +
geom_curve(aes(x = xend, y = yend, xend = x, yend = y, color = under_thresh), # draw edges as arcs
alpha = 1,
data = edges_for_plot_ec, curvature = 0.33) +
scale_color_manual(values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4")) +
geom_point(data = facilities, aes(x = longitude, y = latitude), alpha = 0.25, size = 1) +
geom_point(aes(size=n), alpha = 1, color = 'white') +
geom_point(aes(size=n), fill = adjustcolor(st_cols[c('E. hormaechei ST171')], alpha.f = 0.5), pch = 21) +
scale_size_continuous(breaks = seq(0, 20, 5), limits = c(0,25),
labels = c(0,5,10,15,20),
name = 'Number of\nisolates') +
coord_quickmap() +
# geom_rect(aes(xmin = -986875.33, xmax = -986875.165, ymin = 896963.81, ymax = 896964.1),
geom_rect(aes(xmin = -986875.33+0.58, xmax = -986875.165+0.58, ymin = 896963.81+0.145, ymax = 896964.1+0.145),
fill = "transparent", color = "lightgrey", size = 1) +
labs(color = 'SNV bin', size = 'Number of\nisolates') +
guides(size = guide_legend(override.aes = list(alpha = c(0.25, 1, 1, 1, 1), fill = 'black'))) +
theme_void()
whole_state_ec

zoom_state_ec <- whole_state_ec +
# coord_quickmap(xlim=c(-986875.33+0.007, -986875.165-0.007), ylim=c(896963.81+0.012, 896964.1-0.012)) +
coord_quickmap(xlim=c(-986875.33+0.58+0.007, -986875.165+0.58-0.007), ylim=c(896963.81+0.145+0.013, 896964.1+0.145-0.013)) +
theme(legend.position = "none") +
geom_text(aes(label = ifelse(Facility_ID == 'F38', 'W', NA)),hjust=1.5, vjust=2)
zoom_state_ec

edges_for_plot_kp <- edges_for_plot %>% filter(ST == 'ST258')
whole_state_kp <- facil_st %>%
filter(ST %in% c('K. pneumoniae ST258')) %>%
mutate(ST = gsub('.* ST', 'ST', ST)) %>%
group_by(Facility_ID, latitude, longitude, ST) %>% tally() %>%
ggplot(aes(x=longitude, y=latitude), color = "grey99") +
geom_curve(aes(x = xend, y = yend, xend = x, yend = y, color = under_thresh), # draw edges as arcs
alpha = 1,
data = edges_for_plot_kp, curvature = 0.33) +
scale_color_manual(values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4",'(15,∞)' = "#FBB4AE")) +
geom_point(data = facilities, aes(x = longitude, y = latitude), size = 1, alpha = 0.25) +
geom_point(aes(size=n), alpha = 1, color = 'white') +
geom_point(aes(size=n), fill = adjustcolor(st_cols['K. pneumoniae ST258'], alpha.f = 0.5), pch = 21) +
scale_size_continuous(breaks = seq(5, 20, 5), limits = c(1,25)) +
coord_quickmap() +
# geom_rect(aes(xmin = -986875.33, xmax = -986875.165, ymin = 896963.81, ymax = 896964.1),
geom_rect(aes(xmin = -986875.33+0.58, xmax = -986875.165+0.58, ymin = 896963.81+0.145, ymax = 896964.1+0.145),
fill = "transparent", color = "lightgrey", size = 1) +
labs(size = 'Number of\nisolates', color = 'SNV bin') +
theme_void()
whole_state_kp

zoom_state_kp <- whole_state_kp +
# coord_quickmap(xlim=c(-986875.33+0.007, -986875.165-0.007), ylim=c(896963.81+0.012, 896964.1-0.012)) +
coord_quickmap(xlim=c(-986875.33+0.58+0.007, -986875.165+0.58-0.007), ylim=c(896963.81+0.145+0.013, 896964.1+0.145-0.013)) +
theme(legend.position = "none") +
geom_text(aes(label = ifelse(Facility_ID == 'F38', 'W', NA)),hjust=1.5, vjust=2)
zoom_state_kp

# grob plotting function to plot panel colors
plot_panel_cols <- function(p, fills){
gt <- ggplot_gtable(ggplot_build(p))
strip_both <- which(grepl('strip-', gt$layout$name))
k <- 1
for (i in strip_both) {
j <- which(grepl('rect', gt$grobs[[i]]$grobs[[1]]$childrenOrder))
gt$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1
}
ggplotify::as.ggplot(gt)
}
bp <- edge_dat %>% pivot_longer(cols = c('loc1','loc2'), names_to = 'fname', values_to = 'loc') %>%
filter(ST %in% c('ST171','ST258') & under_thresh != '(15,∞)') %>%
mutate(under_thresh = factor(under_thresh, levels = rev(levels(under_thresh))),
ST = factor(ifelse(ST == 'ST171', '*E. hormaechei* ST171', '*K. pneumoniae* ST258'),
levels = c('*E. hormaechei* ST171', '*K. pneumoniae* ST258'))) %>%
filter(fname == earlier) %>%
select(loc, ST, under_thresh) %>%
ggplot(aes(x = loc)) +
geom_bar(aes(col = ifelse(loc == 'F38', 'yes','no'))) +
geom_bar(aes(fill = under_thresh)) +
facet_grid(ST~.) +
scale_fill_manual(values = c('[0,5]' = "#9EBCDA",'(5,10]' = "#BFD3E6", '(10,15]' = "#E0ECF4",'(15,∞)' = "#FBB4AE")) +
scale_color_manual(values = c('yes' = 'black','no' = NA)) +
labs(x = 'Facility', y = 'Number of isolates closely related\nto an isolate from another facility', fill = 'SNV bin')
bp_labeled <- bp + scale_x_discrete(labels = ifelse(ggplot_build(bp)$layout$panel_params[[1]]$x$breaks == 'F38', 'W','')) +
theme_classic() +
theme(legend.position = 'none', #axis.ticks.x = element_blank(),
text = element_text(size = 10),
axis.text.x = element_text(angle = 0),
strip.background = element_rect(fill="white",linetype='blank'),
strip.text.y = element_markdown())
bp_col <- plot_panel_cols(bp_labeled, adjustcolor(st_cols[c('E. hormaechei ST171','K. pneumoniae ST258')], alpha.f = 0.5))
leg <- ggpubr::get_legend(whole_state_ec +
theme(legend.box.margin = margin(0, 0, 0, 50))
)
f3 <- plot_grid(leg, NULL,
plot_grid(whole_state_ec + theme(legend.position = 'none'), zoom_state_ec,
NULL, NULL,
whole_state_kp + theme(legend.position = 'none'), zoom_state_kp,
NULL, NULL,
nrow = 4,
rel_heights = c(1,0.05,1,0.1,1,0.05,1,0.1)),
plot_grid(NULL, bp_col, NULL, ncol = 1, rel_heights = c(0.1,0.8,0.05)), nrow = 1,
rel_widths = c(0.2, 0.05, 1, 0.8)
)
ggsave(plot = f3, filename = 'figures/Fig3.png', width = 10, height = 4)
f3

# prior facilities for facility w/ st171 isolates
facil_w %>%
mutate(n_patients_tot = n_distinct(Sample_ID)) %>%
filter(!is.na(prior_hosp_alias)) %>%
unique() %>%
group_by(prior_hosp_alias, n_patients_tot) %>%
summarize(n_patients = n()) %>%
ungroup() %>%
mutate(prop_patients = n_patients/n_patients_tot) %>%
arrange(-n_patients)
## # A tibble: 11 × 4
## prior_hosp_alias n_patients_tot n_patients prop_patients
## <dbl> <int> <int> <dbl>
## 1 4 24 10 0.417
## 2 2 24 2 0.0833
## 3 1 24 1 0.0417
## 4 3 24 1 0.0417
## 5 5 24 1 0.0417
## 6 6 24 1 0.0417
## 7 7 24 1 0.0417
## 8 8 24 1 0.0417
## 9 9 24 1 0.0417
## 10 10 24 1 0.0417
## 11 11 24 1 0.0417
Comparison of ST171 and ST258 inter-facility transmission patterns
(Figure S12)
## for each facility, how many times it's involved in a link
frac_ec <- lapply(unique(edges_for_plot_ec$loc1, edges_for_plot_ec$loc2), function(x){
edges_for_plot_ec %>% filter(pairwise_dist <= 10) %>%
mutate(conn = ifelse(loc1 == !!x | loc2 == !!x, 'connected', 'not')) %>%
group_by(conn, ST) %>% summarize(count = n()) %>% mutate(facil = x)
}) %>% bind_rows() %>% suppressMessages() %>% ungroup() %>% filter(!duplicated(.)) %>%
group_by(facil) %>%
mutate(n_facil = sum(count), frac = count/n_facil) %>%
ungroup() %>%
filter(conn == 'connected')
frac_kp <- lapply(unique(edges_for_plot_kp$loc1, edges_for_plot_kp$loc2), function(x){
edges_for_plot_kp %>% filter(pairwise_dist <= 10) %>%
mutate(conn = ifelse(loc1 == !!x | loc2 == !!x, 'connected', 'not')) %>%
group_by(conn, ST) %>% summarize(count = n()) %>% mutate(facil = x)
}) %>% bind_rows() %>% suppressMessages() %>% ungroup() %>% filter(!duplicated(.)) %>%
group_by(facil) %>%
mutate(n_facil = sum(count), frac = count/n_facil) %>%
ungroup() %>%
filter(conn == 'connected')
max(frac_ec$frac)
## [1] 0.74
facils_ec <- frac_ec %>% pull(facil)
ec_perm_cts <- lapply(1:1000, function(x){
sample(facils_ec, sum(frac_ec$count), replace = TRUE) %>% table() %>% data.frame() %>% rename(facil = '.') %>%
mutate(tot = sum(Freq), perm = x, frac = Freq/tot) %>% slice_max(Freq) %>% slice(1)
}) %>% bind_rows()
max(frac_kp$frac)
## [1] 0.2727273
facils_kp <- frac_kp %>% pull(facil)
kp_perm_cts <- lapply(1:1000, function(x){
sample(facils_kp, sum(frac_kp$count), replace = TRUE) %>% table() %>% data.frame() %>% rename(facil = '.') %>%
mutate(tot = sum(Freq), perm = x, frac = Freq/tot) %>% slice_max(Freq) %>% slice(1)
}) %>% bind_rows()
fs12 <- bind_rows(ec_perm_cts %>% select(frac) %>% mutate(ST = 'ST171'),
kp_perm_cts %>% select(frac) %>% mutate(ST = 'ST258')) %>%
mutate(ST = factor(ifelse(ST == 'ST171', '*E. hormaechei* ST171', '*K. pneumoniae* ST258'),
levels = c('*E. hormaechei* ST171', '*K. pneumoniae* ST258'))) %>%
ggplot(aes(x = frac)) +
geom_histogram(binwidth = 0.05) +
geom_point(data = tibble(frac = c(max(frac_ec$frac), max(frac_kp$frac)),
st = c('*E. hormaechei* ST171', '*K. pneumoniae* ST258')),
aes(y = 0), col = 'red', shape = 18, size = 5) +
facet_grid(~st, scales = 'free') +
labs(x = 'Maximum proportion of edges connected to a given facility', y = 'Number of edges') +
theme(strip.text.x = element_markdown())
sum(ec_perm_cts$frac > max(frac_ec$frac))/length(ec_perm_cts$frac)
## [1] 0
sum(kp_perm_cts$frac > max(frac_kp$frac))/length(kp_perm_cts$frac)
## [1] 0.301
fs12 <- plot_panel_cols(fs12, adjustcolor(st_cols[c('E. hormaechei ST171','K. pneumoniae ST258')], alpha.f = 0.5))
# ggsave(plot = fs12, filename = 'figures/FigS12.png', width = 7, height = 4)
fs12

TN facility cluster (Figure 4)
prior_hosps_tn_counts <- prior_hosps_tn %>%
select(Sample_ID, Fac_ID) %>%
unique() %>%
rename(Facility = Fac_ID) %>%
bind_rows(prior_hosps_tn %>%
select(Sample_ID, Prior_facil) %>%
rename(Facility = Prior_facil)) %>%
left_join(facil_st %>%
mutate(ST = ifelse(ST %in% c('E. hormaechei ST171','E. hormaechei ST114',
'K. pneumoniae ST258','K. pneumoniae ST307',
'E. cloacae ST171', 'E. cloacae ST114',
'ST258', 'ST307', 'ST114', 'ST171'),
ST, 'Other'))) %>%
filter(!is.na(ST)) %>%
group_by(ST, Facility) %>%
summarize(count = n()) %>%
arrange(-count)
tn_hm <- prior_hosps_tn_counts %>%
pivot_wider(names_from = Facility, values_from = count, values_fill = 0) %>%
data.frame(row.names = .$ST) %>%
select(-ST) %>%
pheatmap(show_colnames = FALSE, fontsize_col = 8)

tn_facil_hm <- prior_hosps_tn_counts %>%
mutate(Facility = factor(Facility, levels = rev(tn_hm$tree_col$labels[tn_hm$tree_col$order]) %>% gsub('TDH.', 'TDH-', .) %>% gsub('LONG.', 'LONG-', .) %>% gsub('VA.', 'VA-', .) %>% gsub('\\.', ' ', .)),
ST = factor(ST, levels = tn_hm$tree_row$labels),
ST = gsub('E. hormaechei', '*E. hormaechei*', ST),
ST = gsub('K. pneumoniae', '*K. pneumoniae*', ST)) %>%
ggplot(aes(x = Facility, y = ST, fill = count)) +
geom_tile() +
geom_rect(aes(xmin = 0+0.5, xmax = 7+0.5, ymin = 0+0.5, ymax = 3+0.5), fill = NA, col = 'black', size = 1, linetype = 'dotted') +
scale_fill_viridis_c(direction = -1, breaks = scales::pretty_breaks(), option = 'magma') +
theme_classic() +
theme(axis.text.x = element_blank(), axis.text.y = element_markdown()) +
labs(fill = 'Count')
tn_facil_hm

tn_facils_st <- facil_st %>% filter(State == 'TN') %>%
mutate(ST = ifelse(ST %in% c('E. hormaechei ST171','E. hormaechei ST114',
'K. pneumoniae ST258','K. pneumoniae ST307',
'E. cloacae ST171', 'E. cloacae ST114',
'ST258', 'ST307', 'ST114', 'ST171'),
ST, 'Other')) %>%
filter(!is.na(Facility_ID)) %>%
group_by(Facility_ID, ST) %>%
summarize(count = n()) %>%
arrange(Facility_ID) %>%
group_by(Facility_ID) %>%
mutate(max_count = count[which.max(count)],
n_isolates = sum(count)) %>%
group_by(Facility_ID, n_isolates) %>%
summarize(max_sts = str_c(ST[count == max_count], collapse = ', ')) %>%
left_join(tn_facil_cluster) %>%
mutate(facil_st_type = case_when(max_sts == 'K. pneumoniae ST258' ~ '*K. pneumoniae* ST258',
grepl('K. pneumoniae ST258', max_sts) & max_sts != 'K. pneumoniae ST258' ~ '*K. pneumoniae* ST258 & Other',
of_interest ~ 'ST307/ST114/Other facility cluster'),
facil_st_type = ifelse(is.na(facil_st_type), 'Other', facil_st_type))
graph_tn <- tbl_graph(nodes = tn_facils_st, edges = tn_trans_net_sub %>% filter(!grepl('I', source_facil) & !grepl('I', dest_facil) & n_transfers > 1), directed = TRUE) %>%
induced.subgraph(igraph::degree(.) > 0)
tn_network <- graph_tn %>% ggraph(layout = 'kk') +
geom_edge_link(aes(color = n_transfers, alpha = n_transfers), arrow = arrow(length = unit(3, 'mm'))) +
geom_node_point(aes(color = facil_st_type, size = n_isolates)) +
scale_edge_color_viridis(direction = -1, option = 'E') +
scale_color_manual(values = c(unname(st_cols[1]), 'coral3', 'lightsalmon', 'grey')) +
guides(edge_alpha = 'none') +
theme_graph() +
labs(color = 'Most common ST', shape = '', size = 'Number of isolates',
edge_color = 'Number of transfers (in thousands)') #+
# theme(legend.text = element_markdown())
tn_network

tn_facil_transfers <- tn_trans_net_sub %>%
left_join(tn_facils_st %>% rename(source_of_interest = of_interest) %>%
select(Facility_ID, source_of_interest),
by = c(source_facil = 'Facility_ID')) %>%
left_join(tn_facils_st %>% rename(dest_of_interest = of_interest) %>%
select(Facility_ID, dest_of_interest),
by = c(dest_facil = 'Facility_ID')) %>%
replace(is.na(.), 0) %>%
mutate(of_interest = source_of_interest + dest_of_interest) %>%
filter(!grepl('I', source_facil) & !grepl('I', dest_facil) & n_transfers > 1) %>%
ggplot(aes(x = n_transfers, linetype = factor(of_interest, levels = c(2, 1, 0)))) +
geom_density() +
labs(x = 'Number of patient transfers', y = 'Density', linetype = 'Number of isolates\nof pair in facility\ncluster') +
theme(legend.position = 'left')
tn_facil_transfers

plot_grid(plot_grid(tn_facil_hm + theme(legend.position = 'left'), tn_facil_transfers, nrow = 2, align = 'hv', labels = c('A', 'C')), tn_network, nrow = 1, rel_widths = c(0.8, 1), labels = c('', 'B')) %>%
ggsave(filename = 'figures/Fig4_test.png', width = 12, height = 5)